library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
── Attaching packages ────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.1 ──
✔ ggplot2 3.3.6 ✔ purrr 0.3.4
✔ tibble 3.1.7 ✔ dplyr 1.0.9
✔ tidyr 1.2.0 ✔ stringr 1.4.0
✔ readr 2.1.2 ✔ forcats 0.5.1
── Conflicts ───────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
library(scales)
Attaching package: ‘scales’
The following object is masked from ‘package:purrr’:
discard
The following object is masked from ‘package:readr’:
col_factor
library(plotly)
Registered S3 method overwritten by 'data.table':
method from
print.data.table
Attaching package: ‘plotly’
The following object is masked from ‘package:ggplot2’:
last_plot
The following object is masked from ‘package:stats’:
filter
The following object is masked from ‘package:graphics’:
layout
library(lubridate)
Attaching package: ‘lubridate’
The following objects are masked from ‘package:base’:
date, intersect, setdiff, union
library(sf)
Linking to GEOS 3.10.2, GDAL 3.4.2, PROJ 8.2.1; sf_use_s2() is TRUE
library(zoo)
Attaching package: ‘zoo’
The following objects are masked from ‘package:base’:
as.Date, as.Date.numeric
#load in beds and add year column
beds <- read_csv("raw_data/non_covid_raw_data/beds_by_nhs_board_of_treatment_and_specialty.csv") %>% janitor::clean_names()
beds <- beds %>%
mutate(quarter_date = yq(quarter),
year = year(quarter_date),
.after = quarter)
beds %>%
write_csv("clean_data/non_covid_data/beds.csv")
# bed percentage availablity for "all acute"
# Will need to add filter for year based on user input
beds_plotly <- beds %>%
filter(specialty_name == "All Acute") %>%
group_by(quarter, specialty_name) %>%
summarise(mean_perc_occ = mean(percentage_occupancy)) %>%
ggplot(aes(x = quarter, y = mean_perc_occ))+
geom_line(aes(colour = specialty_name, group = specialty_name))+
geom_point(aes(
text = paste0("Occupancy: ",round(mean_perc_occ, digits = 2),"%<br>","Date: ", quarter)),
size = 0.5)+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
labs(title = "Mean bed availability for all Acute Patients",
x = "\nYear and Quarter",
y = "Average Percentage Occupancy")
ggplotly(beds_plotly, tooltip = "text") %>%
config(displayModeBar = FALSE) %>%
layout(hoverlabel=list(bgcolor="red"))
#shows number of attendances at A&E during covid
# Title = Number of attendances at A&E 2020 - 2022
covid_ae_attendance_plotly <- covid_ae_attendance %>%
filter(hb_name == "All Scotland") %>%
ggplot() +
geom_point(aes(x = date,
y = num_attendances,
text = paste0("Date: ", year, "-", month,
"<br>",
"Number of admissions: ", num_attendances,
"<br>",
"2017-2019 avg admissions: ",
round(avg_attendances_20171819))
)) +
geom_line(aes(x = date,
y = num_attendances)) +
geom_line(aes(x = date,
y = avg_attendances_20171819),
colour = "grey60", alpha = 0.5) +
scale_x_date(date_breaks = "3 months", date_labels = "%b %Y") +
theme(axis.text.x = element_text(angle = 90, hjust = 1, size =7)) +
geom_vline(xintercept = as.numeric(as.Date("2020-01-01")), linetype=4, colour = "grey50")+
geom_vline(xintercept = as.numeric(as.Date("2021-01-01")), linetype=4, colour = "grey50")+
geom_vline(xintercept = as.numeric(as.Date("2022-01-01")), linetype=4, colour = "grey50")+
labs(title = "Comparison with 2018-2019 averages \n",
x = "Date",
y = "Number of Attendances")
covid_ae_attendance_plotly %>%
ggplotly(tooltip = "text") %>%
config(displayModeBar = FALSE) %>%
layout(hoverlabel = list(bgcolor = "white"))
#shows destination breakdown for A&E attendances during covid
#Title: Destination of attendances at A&E 2020 - 2022
covid_ae_destinations_plotly <- covid_ae_attendance %>%
filter(hb_name == "All Scotland") %>%
ggplot() +
geom_point(aes(x = date,
y = destination_prop,
colour = destination,
text = paste0("Date: ", year, "-", month,
"<br>",
"Percentage: ",
round(destination_prop*100, digits = 2),
"%",
"<br>",
"2017-2019 percentage: ",
round(avg_prop_20171819*100, digits = 2),
"%"))) +
geom_line(aes(x = date,
y = destination_prop,
group = destination)) +
geom_line(aes(x = date,
y = avg_prop_20171819,
colour = destination,
group = destination), alpha = 0.5) +
scale_x_date(date_breaks = "3 months", date_labels = "%b %Y") +
scale_y_sqrt() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, size =7)) +
geom_vline(xintercept = as.numeric(as.Date("2020-01-01")), linetype=4, colour = "grey50")+
geom_vline(xintercept = as.numeric(as.Date("2021-01-01")), linetype=4, colour = "grey50")+
geom_vline(xintercept = as.numeric(as.Date("2022-01-01")), linetype=4, colour = "grey50")+
labs(title = "Comparison with proportion of attendances at A&E 2017 - 2019 \n",
x = "Date",
y = "Proportion of attendances",
colour = "Destination")
covid_ae_destinations_plotly %>%
ggplotly(tooltip = "text") %>%
config(displayModeBar = FALSE)
ae_wait_times <- read_csv("raw_data/non_covid_raw_data/monthly_ae_waitingtimes_202206.csv") %>% janitor::clean_names()
#make a date and year column with the first date of every month
ae_wait_times <- ae_wait_times %>%
mutate(date = ym(month), .after = month,
year = year(date))
#make a percent column with percent of patients meeting the 4hr target time
ae_wait_times <- ae_wait_times %>%
mutate(percent_4hr_target_achieved = (number_meeting_target_aggregate/number_of_attendances_aggregate)*100)
ae_wait_times %>%
write_csv("clean_data/non_covid_data/ae_wait_times.csv")
#write in the target data for the shapefile colours
target_2007 <- ae_wait_times %>%
group_by(year, hbt) %>%
summarise(ae_4hr_target_achieved =
round(mean(percent_4hr_target_achieved, na.rm = TRUE), digits = 2)) %>%
filter(year == 2007) %>%
rename(ae_target_2007 = ae_4hr_target_achieved) %>%
ungroup() %>%
select(hbt,ae_target_2007)
target_2008 <- ae_wait_times %>%
group_by(year, hbt) %>%
summarise(ae_4hr_target_achieved =
round(mean(percent_4hr_target_achieved, na.rm = TRUE), digits = 2)) %>%
filter(year == 2008) %>%
rename(ae_target_2008 = ae_4hr_target_achieved) %>%
ungroup() %>%
select(hbt,ae_target_2008)
target_2009 <- ae_wait_times %>%
group_by(year, hbt) %>%
summarise(ae_4hr_target_achieved =
round(mean(percent_4hr_target_achieved, na.rm = TRUE), digits = 2)) %>%
filter(year == 2009) %>%
rename(ae_target_2009 = ae_4hr_target_achieved) %>%
ungroup() %>%
selecat(hbt,ae_target_2009)
target_2010 <- ae_wait_times %>%
group_by(year, hbt) %>%
summarise(ae_4hr_target_achieved =
round(mean(percent_4hr_target_achieved, na.rm = TRUE), digits = 2)) %>%
filter(year == 2010) %>%
rename(ae_target_2010 = ae_4hr_target_achieved) %>%
ungroup() %>%
select(hbt,ae_target_2010)
target_2011 <- ae_wait_times %>%
group_by(year, hbt) %>%
summarise(ae_4hr_target_achieved =
round(mean(percent_4hr_target_achieved, na.rm = TRUE), digits = 2)) %>%
filter(year == 2011) %>%
rename(ae_target_2011 = ae_4hr_target_achieved) %>%
ungroup() %>%
select(hbt,ae_target_2011)
target_2012 <- ae_wait_times %>%
group_by(year, hbt) %>%
summarise(ae_4hr_target_achieved =
round(mean(percent_4hr_target_achieved, na.rm = TRUE), digits = 2)) %>%
filter(year == 2012) %>%
rename(ae_target_2012 = ae_4hr_target_achieved) %>%
ungroup() %>%
select(hbt,ae_target_2012)
target_2013 <- ae_wait_times %>%
group_by(year, hbt) %>%
summarise(ae_4hr_target_achieved =
round(mean(percent_4hr_target_achieved, na.rm = TRUE), digits = 2)) %>%
filter(year == 2013) %>%
rename(ae_target_2013 = ae_4hr_target_achieved) %>%
ungroup() %>%
select(hbt,ae_target_2013)
target_2014 <- ae_wait_times %>%
group_by(year, hbt) %>%
summarise(ae_4hr_target_achieved =
round(mean(percent_4hr_target_achieved, na.rm = TRUE), digits = 2)) %>%
filter(year == 2014) %>%
rename(ae_target_2014 = ae_4hr_target_achieved) %>%
ungroup() %>%
select(hbt,ae_target_2014)
target_2015 <- ae_wait_times %>%
group_by(year, hbt) %>%
summarise(ae_4hr_target_achieved =
round(mean(percent_4hr_target_achieved, na.rm = TRUE), digits = 2)) %>%
filter(year == 2015) %>%
rename(ae_target_2015 = ae_4hr_target_achieved) %>%
ungroup() %>%
select(hbt,ae_target_2015)
target_2016 <- ae_wait_times %>%
group_by(year, hbt) %>%
summarise(ae_4hr_target_achieved =
round(mean(percent_4hr_target_achieved, na.rm = TRUE), digits = 2)) %>%
filter(year == 2016) %>%
rename(ae_target_2016 = ae_4hr_target_achieved) %>%
ungroup() %>%
select(hbt,ae_target_2016)
target_2017 <- ae_wait_times %>%
group_by(year, hbt) %>%
summarise(ae_4hr_target_achieved =
round(mean(percent_4hr_target_achieved, na.rm = TRUE), digits = 2)) %>%
filter(year == 2017) %>%
rename(ae_target_2017 = ae_4hr_target_achieved) %>%
ungroup() %>%
select(hbt,ae_target_2017)
target_2018 <- ae_wait_times %>%
group_by(year, hbt) %>%
summarise(ae_4hr_target_achieved =
round(mean(percent_4hr_target_achieved, na.rm = TRUE), digits = 2)) %>%
filter(year == 2018) %>%
rename(ae_target_2018 = ae_4hr_target_achieved) %>%
ungroup() %>%
select(hbt,ae_target_2018)
target_2019 <- ae_wait_times %>%
group_by(year, hbt) %>%
summarise(ae_4hr_target_achieved =
round(mean(percent_4hr_target_achieved, na.rm = TRUE), digits = 2)) %>%
filter(year == 2019) %>%
rename(ae_target_2019 = ae_4hr_target_achieved) %>%
ungroup() %>%
select(hbt,ae_target_2019)
target_2020 <- ae_wait_times %>%
group_by(year, hbt) %>%
summarise(ae_4hr_target_achieved =
round(mean(percent_4hr_target_achieved, na.rm = TRUE), digits = 2)) %>%
filter(year == 2020) %>%
rename(ae_target_2020 = ae_4hr_target_achieved) %>%
ungroup() %>%
select(hbt,ae_target_2020)
target_2021 <- ae_wait_times %>%
group_by(year, hbt) %>%
summarise(ae_4hr_target_achieved =
round(mean(percent_4hr_target_achieved, na.rm = TRUE), digits = 2)) %>%
filter(year == 2021) %>%
rename(ae_target_2021 = ae_4hr_target_achieved) %>%
ungroup() %>%
select(hbt,ae_target_2021)
scotland <- st_read("../SG_NHS_HealthBoards_2019_shapefile/SG_NHS_HealthBoards_2019.shp")
# make a smaller version for performance issues
scotland_smaller <- scotland %>%
st_simplify(TRUE, dTolerance = 2000)
#fixes problems caused by above
scotland_smaller <- sf::st_cast(scotland_smaller, "MULTIPOLYGON")
# centres <- scotland_smaller %>%
# mutate(centres = st_centroid(st_make_valid(geometry))) %>%
# mutate(lat = st_coordinates(centres)[,1],
# long = st_coordinates(centres)[,2])a
#add in the A&E 4 hr target data for each year
scotland_smaller <- scotland_smaller %>%
mutate(target_2007 = target_2007$ae_target_2007,
target_2008 = target_2008$ae_target_2008,
target_2009 = target_2009$ae_target_2009,
target_2010 = target_2010$ae_target_2010,
target_2011 = target_2011$ae_target_2011,
target_2012 = target_2012$ae_target_2012,
target_2013 = target_2013$ae_target_2013,
target_2014 = target_2014$ae_target_2014,
target_2015 = target_2015$ae_target_2015,
target_2016 = target_2016$ae_target_2016,
target_2017 = target_2017$ae_target_2017,
target_2018 = target_2018$ae_target_2018,
target_2019 = target_2019$ae_target_2019,
target_2020 = target_2020$ae_target_2020,
target_2021 = target_2021$ae_target_2021
)
# save the map
st_write(scotland_smaller, "clean_data/shapefile/scotland_smaller.gpkg", append = FALSE)
## reload the map
scotland_smaller <- st_read("clean_data/shapefile/scotland_smaller.gpkg")
# # This will require filtered by the year selected in the dashboard
# # Can we get the button or dropdown to pass eg "target_2016" to this in 2 places?
p <- ggplot(scotland_smaller) +
geom_sf(aes(fill = target_2021,
text = paste("<b>", HBName, "</b>\n",
round(target_2021, digits = 2),"%", sep = ""))) +
scale_fill_viridis_c(option = "plasma", name = "4Hr A&E Target %")+
theme_void()+
labs(title = "Percent of A&E depts making the 4hr target")
p %>%
ggplotly(tooltip = "text") %>%
style(hoverlabel = list(bgcolor = "white"), hoveron = "fill")%>%
config(displayModeBar = FALSE)
simd <- read_csv("raw_data/non_covid_raw_data/inpatient_and_daycase_by_nhs_board_of_treatment_and_simd.csv") %>% janitor::clean_names()
simd <- simd %>%
mutate(quarter_date = yq(quarter),
year = year(quarter_date),
.after = quarter)
simd %>%
write_csv("clean_data/non_covid_data/simd.csv")
# average episodes by SIMD value
# currently unfiltered for health board or admission type etc
simd_plotly <- simd %>%
drop_na(simd) %>%
mutate(simd = as.factor(simd)) %>% # gives each simd a separate colour
group_by(quarter, simd) %>%
summarise(avg_episodes = mean(episodes, na.rm = TRUE)) %>%
ggplot(aes(x = quarter, y = avg_episodes, group = simd))+
geom_line(aes(colour = simd))+
geom_point(aes(text = paste0("Date: ", quarter, "<br>",
"Average Episodes: ", round(avg_episodes, digits = 2), "<br>",
"SIMD: ", simd),
colour = simd),size = 0.5)+
scale_y_continuous(labels = scales::comma)+
labs(title = "Average Hospital Episodes by SIMD Deprivation score\n",
x = "\nYear and Quarter",
y = "Average Episodes\n")+
theme_minimal()+
theme(axis.text.x = element_text(angle = 45, hjust = 1))
ggplotly(simd_plotly, tooltip = "text") %>%
config(displayModeBar = FALSE)
age_sex <- read_csv("raw_data/non_covid_raw_data/inpatient_and_daycase_by_nhs_board_of_treatment_age_and_sex.csv") %>% janitor::clean_names()
age_sex <- age_sex %>%
mutate(quarter_date = yq(quarter),
year = year(quarter_date),
.after = quarter)
age_sex %>%
write_csv("clean_data/non_covid_data/age_sex.csv")
# Average number of episode for age groups
# currently unfiltered by department or anything else
age_plotly <- age_sex %>%
#filter(min_date < year & year < max_date) %>%
group_by(quarter, age) %>%
summarise(avg_episodes = mean(episodes, na.rm = TRUE)) %>%
ggplot(aes(x = quarter, y = avg_episodes))+
geom_line(aes(colour = age, group = age))+
geom_point(aes(colour = age,
text = paste0("Date: ", quarter, "<br>",
"Average Episodes: ", round(avg_episodes, digits = 2), "<br>",
"Age Group: ", age)),size = 0.5)+
labs(title = "Average Hospital Episodes by Age Groups\n",
x = "\nYear and Quarter",
y = "Average Episodes\n")+
theme_minimal()+
theme(axis.text.x = element_text(angle = 45, hjust = 1))
ggplotly(age_plotly, tooltip = "text") %>%
config(displayModeBar = FALSE)
hb_agesex <- read_csv("clean_data/covid_agesex.csv")
Rows: 43516 Columns: 16
── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (10): month, hb, hbqf, age_group, age_group_qf, sex, sex_qf, admission_type, admission_type_qf, hb_name
dbl (4): year, number_admissions, average20182019, percent_variation
lgl (1): is_winter
date (1): week_ending
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#title: Average admissions by age group 2020-2022
covid_age_plotly <- hb_agesex %>%
filter(admission_type == "Emergency",
age_group != "All ages",
hb_name == "All Scotland") %>%
group_by(hb_name, age_group, is_winter) %>%
summarise(mean_admissions = mean(number_admissions),
mean_20182019_admissions = mean(average20182019)) %>%
ggplot() +
geom_col(aes(x = ordered(age_group, levels = c("Under 5",
"5 - 14",
"15 - 44",
"45 - 64",
"65 - 74",
"75 - 84",
"85 and over")),
y = mean_admissions,
fill = if_else(is_winter == TRUE,
"Winter", "Not winter"),
text = paste0("Age group: ", age_group,
"<br>",
"Average number of admissions: ",
round(mean_admissions),
"<br>",
"2018/2019 avg admissions: ",
round(mean_20182019_admissions))),
position = "dodge") +
labs(title = "Comparison between winter and non-winter months",
x = "\n Age group",
y = "Mean number of admissions",
fill = "Season")
`summarise()` has grouped output by 'hb_name', 'age_group'. You can override using the `.groups` argument.
Warning: Ignoring unknown aesthetics: text
covid_age_plotly %>%
ggplotly(tooltip = "text") %>%
config(displayModeBar = FALSE)
library(leaflet)
library(sf)
# scotland_smaller <- readOGR("../SG_NHS_HealthBoards_2019_shapefile/",layer = "SG_NHS_HealthBoards_2019")
# shapeData <- spTransform(scotland_smaller, CRS("+proj=longlat +ellps=GRS80"))
# scotland <- scotland %>%
# mutate(centres = st_centroid(st_make_valid(geometry))) %>%
# mutate(lat = st_coordinates(centres)[,1],
# long = st_coordinates(centres)[,2])
library(here)
here()
#read the geo package in
scotland = st_read("clean_data/shapefile/scotland_smaller.gpkg")
#transform so leaflet is happy with it
scotland <- st_transform(scotland, '+proj=longlat +datum=WGS84')
pal <- colorNumeric("viridis", NULL) # set colour palette
# open up leaflet and add scotland as data, along with other variables
m <- leaflet()
m %>% addTiles() %>%
addPolygons(data=scotland,
smoothFactor = 0.3,
fillOpacity = 1,
fillColor = ~pal(target_2007),
label = ~paste0( HBName,": ", target_2007),
weight = 1,
highlightOptions = highlightOptions(color = "white",
weight = 2,
bringToFront = TRUE)) %>%
addLegend(pal = pal, values = scotland$target_2007, opacity = 1)
#read in clean dataset
waiting_times <- read_csv("clean_data/wait_times.csv")
#graph of A&E destination breakdown
winter_plotly <- waiting_times %>%
filter(discharge_destination == "Admission to Same Facility",
!is.na(discharge_proportion)) %>%
group_by(date) %>%
summarise(mean_admission_to_same = mean(discharge_proportion)) %>%
ggplot() +
geom_point(aes(x = date,
y = mean_admission_to_same,
text = paste0("Date: ", date,
"<br>",
"Percentage: ",
round(mean_admission_to_same*100, digits = 2),
"%"))) +
geom_line(aes(x = date,
y = mean_admission_to_same)) +
scale_x_date(date_breaks = "6 months", date_labels = "%b %Y") +
theme(axis.text.x = element_text(angle = 90, hjust = 1, size =7)) +
geom_vline(xintercept = as.numeric(as.Date("2008-01-01")), linetype=4, colour = "grey50", alpha = 0.7)+
geom_vline(xintercept = as.numeric(as.Date("2009-01-01")), linetype=4, colour = "grey50", alpha = 0.7)+
geom_vline(xintercept = as.numeric(as.Date("2010-01-01")), linetype=4, colour = "grey50", alpha = 0.7)+
geom_vline(xintercept = as.numeric(as.Date("2011-01-01")), linetype=4, colour = "grey50", alpha = 0.7)+
geom_vline(xintercept = as.numeric(as.Date("2012-01-01")), linetype=4, colour = "grey50", alpha = 0.7)+
geom_vline(xintercept = as.numeric(as.Date("2013-01-01")), linetype=4, colour = "grey50", alpha = 0.7)+
geom_vline(xintercept = as.numeric(as.Date("2014-01-01")), linetype=4, colour = "grey50", alpha = 0.7)+
geom_vline(xintercept = as.numeric(as.Date("2015-01-01")), linetype=4, colour = "grey50", alpha = 0.7)+
geom_vline(xintercept = as.numeric(as.Date("2016-01-01")), linetype=4, colour = "grey50", alpha = 0.7)+
geom_vline(xintercept = as.numeric(as.Date("2017-01-01")), linetype=4, colour = "grey50", alpha = 0.7)+
geom_vline(xintercept = as.numeric(as.Date("2018-01-01")), linetype=4, colour = "grey50", alpha = 0.7)+
geom_vline(xintercept = as.numeric(as.Date("2019-01-01")), linetype=4, colour = "grey50", alpha = 0.7)+
geom_vline(xintercept = as.numeric(as.Date("2020-01-01")), linetype=4, colour = "grey50", alpha = 0.7)+
geom_vline(xintercept = as.numeric(as.Date("2021-01-01")), linetype=4, colour = "grey50", alpha = 0.7)+
geom_vline(xintercept = as.numeric(as.Date("2022-01-01")), linetype=4, colour = "grey50", alpha = 0.7) +
labs(title = "Proportion of attendances to selected destination \n",
x = "\n Date",
y = "Proportion of attendances")
winter_plotly %>%
ggplotly(tooltip = "text") %>%
config(displayModeBar = FALSE) %>%
layout(hoverlabel = list(bgcolor = "white"))